# File system routine for colab
import os, sys
my_path = "EPFLectures2/DataViz/happiviz/" # need to add folder to your drive and change this when you execute
IN_COLAB = 'google.colab' in sys.modules
class FileSystem:
def __init__(self, colab_dir=my_path, local_dir="./", data_dir="data"):
if (IN_COLAB):
from google.colab import drive
drive.mount('/gdrive')
self.root_dir = os.path.join("/gdrive/My Drive/", colab_dir)
else:
self.root_dir = local_dir
self.data_dir = data_dir
self.change_directory = False
def data_path(self, name):
return os.path.join(self.data_dir, name) if self.change_directory else os.path.join(self.root_dir, self.data_dir, name)
def path(self, name):
return os.path.join("./", name) if self.change_directory else os.path.join("./", self.root_dir, name)
def cd(self):
%cd {self.root_dir}
%ls
self.change_directory = True
fs = FileSystem()
fs.cd()
!pip install plotly
!pip install chart-studio
# source https://www.kaggle.com/dhanyajothimani/basic-visualization-and-clustering-in-python modified to work on google colab
#Call required libraries
import time # To time processes
import warnings # To suppress warnings
import numpy as np # Data manipulation
import pandas as pd # Dataframe manipulatio
import matplotlib.pyplot as plt # For graphics
import seaborn as sns
import chart_studio.plotly as py #For World Map
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
from sklearn.preprocessing import StandardScaler # For scaling dataset
from sklearn.cluster import KMeans, AgglomerativeClustering, AffinityPropagation #For clustering
from sklearn.mixture import GaussianMixture #For GMM clustering
import os # For os related operations
import sys # For data size
def enable_plotly_in_cell():
import IPython
from plotly.offline import init_notebook_mode
display(IPython.core.display.HTML('''<script src="/static/components/requirejs/require.js"></script>'''))
init_notebook_mode(connected=False)
wh = pd.read_csv("https://raw.githubusercontent.com/com-480-data-visualization/com-480-project-datavaders/master/2017.csv")
wh.describe()
print("Dimension of dataset: wh.shape")
wh.dtypes
wh1 = wh[['Happiness.Score','Economy..GDP.per.Capita.','Family','Health..Life.Expectancy.', 'Freedom',
'Generosity','Trust..Government.Corruption.','Dystopia.Residual']] #Subsetting the data
cor = wh1.corr() #Calculate the correlation of the above variables
sns.heatmap(cor, square = True) #Plot the correlation as heat map
#Ref: https://plot.ly/python/choropleth-maps/
enable_plotly_in_cell()
data = dict(type = 'choropleth',
locations = wh['Country'],
locationmode = 'country names',
z = wh['Happiness.Score'],
text = wh['Country'],
colorbar = {'title':'Happiness'})
layout = dict(title = 'Happiness Index 2017',
geo = dict(showframe = False,
projection = {'type': 'mercator'}))
choromap3 = go.Figure(data = [data], layout=layout)
iplot(choromap3)
data = dict(type = 'choropleth',
locations = wh['Country'],
locationmode = 'country names',
z = wh['Happiness.Score'],
text = wh['Country'],
colorbar = {'title':'Happiness'})
layout = dict(title = 'Happiness Index 2017',
geo = dict(showframe = False,
projection = {'type': 'mercator'}))
choromap3 = go.Figure(data = [data], layout=layout)
iplot(choromap3)
import plotly.express as px
fig = px.scatter(wh, x="Happiness.Rank", y="Happiness.Score",
animation_group="Country",
size="Happiness.Score", color="Country", hover_name="Country")
fig.show()
enable_plotly_in_cell()
import plotly.express as px
fig = px.scatter(wh, x="Happiness.Rank", y="Happiness.Score",
animation_group="Country",
size="Happiness.Score", color="Country", hover_name="Country")
fig.show()
"""d2019 = pd.read_csv("https://raw.githubusercontent.com/com-480-data-visualization/com-480-project-datavaders/master/2019.csv")
coltoselect = ['rank', 'country', 'region', 'score', 'gdp_per_capita','healthy_life_expectancy', 'freedom_to_life_choice', 'generosity','corruption_perceptions']
wh = wh.loc[:,coltoselect].copy()
d2019 = d2019.loc[:,coltoselect].copy()
wh["year"] = 2018
d2019["year"] = 2019
finaldf = d2019.append([wh])
fig = px.scatter(finaldf, x="Happiness.Rank", y="Happiness.Score", animation_frame="year",
animation_group="Country",
size="Happiness.Score", color="Country", hover_name="Country")
fig.show()"""